In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import gamma
from scipy.special import gamma as g
from scipy.special import gammaincc
from math import factorial, exp
from itertools import permutations, combinations
from sklearn.metrics.pairwise import rbf_kernel, laplacian_kernel

%matplotlib inline

In [2]:
import sys
sys.path.insert(0, '/Users/mati/Devel/dsga1005/code')

from independence_test import *
from r_independence import *

Test independent

H0: Independent

H1: Not Independent

Should not reject H0


In [3]:
size = 2000
X_1 = np.random.rand(size)
Y_1 = np.random.randn(size)

In [4]:
test_ind = HSIC_b(X_1, Y_1, kernel='exponential')

In [5]:
test_ind.empirical_test()


Out[5]:
5.394588047922858e-06

In [6]:
test_ind.p_value


Out[6]:
0.0

In [7]:
test_ind_r = dHSIC(X_1, Y_1)

In [28]:
print test_ind_r.res


$statistic
[1] 0.2921431

$crit.value
[1] 0.6047159

$p.value
[1] 0.4910761

$time
GramMat   dHSIC CritVal 
  1.219   0.037   0.053 



In [8]:
test_ind_r.statistic


Out[8]:
0.2921430944802239

In [9]:
test_ind_r.p_value


Out[9]:
0.49107605699028706

Test NOT independent

H0: Independent

H1: Not Independent

Should reject H0


In [18]:
size = 200
X_2 = np.random.normal(0,10,size)
Y_2 = X_2

In [19]:
test_non = HSIC_b(X_2, Y_2, kernel='exponential')

In [20]:
test_non.empirical_test()


Out[20]:
0.025254149673630218

In [21]:
test_non.p_value


Out[21]:
1.0

In [22]:
test_non_r = dHSIC(X_2, Y_2)

In [29]:
print test_non_r.res


$statistic
[1] 20.15576

$crit.value
[1] 0.5838765

$p.value
[1] 1.843958e-132

$time
GramMat   dHSIC CritVal 
  0.009   0.001   0.001 



In [23]:
test_non_r.statistic


Out[23]:
20.155764972379085

In [24]:
test_non_r.p_value


Out[24]:
1.8439575315357636e-132

Conditional Independence Test

Conditional Independent variables

H0: Conditionallly Independent

H1: Not Conditionally Independent

Should not reject H0


In [3]:
SIZE = 500
Z = np.random.randn(SIZE) * 100
X = Z + np.random.randn(SIZE)
Y = Z + np.random.randn(SIZE)

Z_vars = np.array(['Z'])

data = pd.DataFrame(np.array([X, Y, Z]).T, columns=["X", "Y", "Z"])

In [4]:
ci = CI('X', 'Y', ['Z'], data, 'cor')

In [6]:
ci.statistic


Out[6]:
0.04249053645171306

In [7]:
ci.p_value


Out[7]:
0.3435296910152168

Not conditional Independent variables

H0: Conditionallly Independent

H1: Not Conditionally Independent

Should reject H0


In [8]:
SIZE = 500
Z = np.random.randn(SIZE) * 100
X = np.random.randn(SIZE)
Y = X

Z_vars = np.array(['Z'])

data = pd.DataFrame(np.array([X, Y, Z]).T, columns=["X", "Y", "Z"])

In [9]:
ci = CI('X', 'Y', ['Z'], data, 'cor')

In [11]:
ci.statistic


Out[11]:
-1.0

In [12]:
ci.p_value


Out[12]:
0.0

In [ ]: